/* 

Author: Ramin Izadi / Joonas Tuhkuri
Updated: July 25 2024

Description:

This do-file compiles the main person_year file. ID:s in persons.dta file are used as a baseline to keep data size in the relevant range (i.e. the code starts with taking the ID:n from persons.dta). Persons.dta contains baseline person info, particularly the MIL data.


*/

*Preliminaries

	clear all
	cd "W:\Ramin\Stereotypes revision"

	clear all
	global project_root "W:\Ramin\Stereotypes revision"
	
*Use baseline persons file

	use "W:\data2018\persons.dta"

*Merge with person id crosswalk

	keep if synvuosi >=1962
	keep if synvuosi <=1979
	keep if T2_6 != . // or keep if pluku != .
	merge 1:1 id using "${project_root}\data_prep\xwalk_id_shnro.dta"  // Note: imperfect match
	keep if _merge == 3
	keep shnro
	
*Merge with earlier prepared datasets from FOLK

	merge 1:m shnro using "${project_root}\data_prep\folk_perus_87_18_build.dta"
	keep if _merge == 3 | _merge == 1
	order shnro vuosi
	sort shnro vuosi
	drop _merge
	
	merge 1:1 shnro vuosi using "${project_root}\data_prep\folk_tyosuhde_build.dta"
	keep if _merge ==3 | _merge == 1
	order shnro vuosi
	sort shnro vuosi
	drop _merge

* Merge with industry xwalk

	gen toimiala_threedigit = substr(toimiala, 1, 3)
	destring(toimiala_threedigit), gen(original_industry) force
	merge m:1 vuosi original_industry using "${project_root}\data_prep\xwalk_industry_3digit.dta"
	drop _merge
	
*Create a numeric plant ID (firm or establishment)
	egen plant_id = group(sykstun)
	
*Drop if no ID
	drop if shnro == ""	
		
*Create a numeric ID
	egen shnro_id = group(shnro)
	
*Order data
	order shnro_id vuosi plant_id

*Panel setup
	sort shnro_id vuosi
	xtset shnro_id vuosi	
		
*Save baseline data
	save "${project_root}\data_prep\person_year.dta", replace

*Shrink data for R
	keep shnro vuosi ika tyotu_k tyrtuo_k industry ammattikoodi_k plant_id ptoim1 skunta ututku_ala ututku_aste oyr_omist_tyyppi
	
*Save data for R
save "W:\Ramin\Stereotypes revision\data_prep\person_year_R.dta", replace



			 	